/*
 *   This program is free software: you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation, either version 3 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY; without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *   GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

/*
 *    Cobweb.java
 *    Copyright (C) 2001-2012 University of Waikato, Hamilton, New Zealand
 *
 */

package weka.clusterers;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Random;
import java.util.Vector;

import weka.core.AttributeStats;
import weka.core.Capabilities;
import weka.core.Capabilities.Capability;
import weka.core.Drawable;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.TechnicalInformation;
import weka.core.TechnicalInformation.Field;
import weka.core.TechnicalInformation.Type;
import weka.core.TechnicalInformationHandler;
import weka.core.Utils;
import weka.experiment.Stats;
import weka.filters.Filter;
import weka.filters.unsupervised.attribute.Add;

/**
 * <!-- globalinfo-start --> Class implementing the Cobweb and Classit
 * clustering algorithms.<br/>
 * <br/>
 * Note: the application of node operators (merging, splitting etc.) in terms of
 * ordering and priority differs (and is somewhat ambiguous) between the
 * original Cobweb and Classit papers. This algorithm always compares the best
 * host, adding a new leaf, merging the two best hosts, and splitting the best
 * host when considering where to place a new instance.<br/>
 * <br/>
 * For more information see:<br/>
 * <br/>
 * D. Fisher (1987). Knowledge acquisition via incremental conceptual
 * clustering. Machine Learning. 2(2):139-172.<br/>
 * <br/>
 * J. H. Gennari, P. Langley, D. Fisher (1990). Models of incremental concept
 * formation. Artificial Intelligence. 40:11-61.
 * <p/>
 * <!-- globalinfo-end -->
 * 
 * <!-- technical-bibtex-start --> BibTeX:
 * 
 * <pre>
 * &#64;article{Fisher1987,
 *    author = {D. Fisher},
 *    journal = {Machine Learning},
 *    number = {2},
 *    pages = {139-172},
 *    title = {Knowledge acquisition via incremental conceptual clustering},
 *    volume = {2},
 *    year = {1987}
 * }
 * 
 * &#64;article{Gennari1990,
 *    author = {J. H. Gennari and P. Langley and D. Fisher},
 *    journal = {Artificial Intelligence},
 *    pages = {11-61},
 *    title = {Models of incremental concept formation},
 *    volume = {40},
 *    year = {1990}
 * }
 * </pre>
 * <p/>
 * <!-- technical-bibtex-end -->
 * 
 * <!-- options-start --> Valid options are:
 * <p/>
 * 
 * <pre>
 *  -A &lt;acuity&gt;
 *  Acuity.
 *  (default=1.0)
 * </pre>
 * 
 * <pre>
 *  -C &lt;cutoff&gt;
 *  Cutoff.
 *  (default=0.002)
 * </pre>
 * 
 * <pre>
 *  -save-data
 *  Save instance data.
 * </pre>
 * 
 * <pre>
 *  -S &lt;num&gt;
 *  Random number seed.
 *  (default 42)
 * </pre>
 * 
 * <pre>
 *  -output-debug-info
 *  If set, clusterer is run in debug mode and
 *  may output additional info to the console
 * </pre>
 * 
 * <pre>
 *  -do-not-check-capabilities
 *  If set, clusterer capabilities are not checked before clusterer is built
 *  (use with caution).
 * </pre>
 * 
 * <!-- options-end -->
 * 
 * @author <a href="mailto:mhall@cs.waikato.ac.nz">Mark Hall</a>
 * @version $Revision$
 * @see RandomizableClusterer
 * @see Drawable
 */
public class Cobweb extends RandomizableClusterer implements Drawable, TechnicalInformationHandler, UpdateableClusterer {

    /** for serialization */
    static final long serialVersionUID = 928406656495092318L;

    /**
     * Inner class handling node operations for Cobweb.
     * 
     * @see Serializable
     */
    public class CNode implements Serializable {

        /** for serialization */
        static final long serialVersionUID = 3452097436933325631L;
        /**
         * Within cluster attribute statistics
         */
        private AttributeStats[] m_attStats;

        /**
         * Number of attributes
         */
        private final int m_numAttributes;

        /**
         * Instances at this node
         */
        protected Instances m_clusterInstances = null;

        /**
         * Children of this node
         */
        private ArrayList<CNode> m_children = null;

        /**
         * Total instances at this node
         */
        private double m_totalInstances = 0.0;

        /**
         * Cluster number of this node
         */
        private int m_clusterNum = -1;

        /**
         * Creates an empty <code>CNode</code> instance.
         * 
         * @param numAttributes the number of attributes in the data
         */
        public CNode(int numAttributes) {
            m_numAttributes = numAttributes;
        }

        /**
         * Creates a new leaf <code>CNode</code> instance.
         * 
         * @param numAttributes the number of attributes in the data
         * @param leafInstance  the instance to store at this leaf
         */
        public CNode(int numAttributes, Instance leafInstance) {
            this(numAttributes);
            if (m_clusterInstances == null) {
                m_clusterInstances = new Instances(leafInstance.dataset(), 1);
            }
            m_clusterInstances.add(leafInstance);
            updateStats(leafInstance, false);
        }

        /**
         * Adds an instance to this cluster.
         * 
         * @param newInstance the instance to add
         * @throws Exception if an error occurs
         */
        protected void addInstance(Instance newInstance) throws Exception {
            // Add the instance to this cluster

            if (m_clusterInstances == null) {
                m_clusterInstances = new Instances(newInstance.dataset(), 1);
                m_clusterInstances.add(newInstance);
                updateStats(newInstance, false);
                return;
            } else if (m_children == null) {
                /*
                 * we are a leaf, so make our existing instance(s) into a child and then add the
                 * new instance as a child
                 */
                m_children = new ArrayList<CNode>();
                CNode tempSubCluster = new CNode(m_numAttributes, m_clusterInstances.instance(0));

                // System.out.println("Dumping "+m_clusterInstances.numInstances());
                for (int i = 1; i < m_clusterInstances.numInstances(); i++) {
                    tempSubCluster.m_clusterInstances.add(m_clusterInstances.instance(i));
                    tempSubCluster.updateStats(m_clusterInstances.instance(i), false);
                }
                m_children = new ArrayList<CNode>();
                m_children.add(tempSubCluster);
                m_children.add(new CNode(m_numAttributes, newInstance));

                m_clusterInstances.add(newInstance);
                updateStats(newInstance, false);

                // here is where we check against cutoff (also check cutoff
                // in findHost)
                if (categoryUtility() < m_cutoff) {
                    // System.out.println("Cutting (leaf add) ");
                    m_children = null;
                }
                return;
            }

            // otherwise, find the best host for this instance
            CNode bestHost = findHost(newInstance, false);
            if (bestHost != null) {
                // now add to the best host
                bestHost.addInstance(newInstance);
            }
        }

        /**
         * Temporarily adds a new instance to each of this nodes children in turn and
         * computes the category utility.
         * 
         * @param newInstance the new instance to evaluate
         * @return an array of category utility values---the result of considering each
         *         child in turn as a host for the new instance
         * @throws Exception if an error occurs
         */
        private double[] cuScoresForChildren(Instance newInstance) throws Exception {
            // look for a host in existing children
            double[] categoryUtils = new double[m_children.size()];

            // look for a home for this instance in the existing children
            for (int i = 0; i < m_children.size(); i++) {
                CNode temp = m_children.get(i);
                // tentitively add the new instance to this child
                temp.updateStats(newInstance, false);
                categoryUtils[i] = categoryUtility();

                // remove the new instance from this child
                temp.updateStats(newInstance, true);
            }
            return categoryUtils;
        }

        private double cuScoreForBestTwoMerged(CNode merged, CNode a, CNode b, Instance newInstance) throws Exception {

            double mergedCU = -Double.MAX_VALUE;
            // consider merging the best and second
            // best.
            merged.m_clusterInstances = new Instances(m_clusterInstances, 1);

            merged.addChildNode(a);
            merged.addChildNode(b);
            merged.updateStats(newInstance, false); // add new instance to stats
            // remove the best and second best nodes
            m_children.remove(m_children.indexOf(a));
            m_children.remove(m_children.indexOf(b));
            m_children.add(merged);
            mergedCU = categoryUtility();
            // restore the status quo
            merged.updateStats(newInstance, true);
            m_children.remove(m_children.indexOf(merged));
            m_children.add(a);
            m_children.add(b);
            return mergedCU;
        }

        /**
         * Finds a host for the new instance in this nodes children. Also considers
         * merging the two best hosts and splitting the best host.
         * 
         * @param newInstance     the instance to find a host for
         * @param structureFrozen true if the instance is not to be added to the tree
         *                        and instead the best potential host is to be returned
         * @return the best host
         * @throws Exception if an error occurs
         */
        private CNode findHost(Instance newInstance, boolean structureFrozen) throws Exception {

            if (!structureFrozen) {
                updateStats(newInstance, false);
            }

            // look for a host in existing children and also consider as a new leaf
            double[] categoryUtils = cuScoresForChildren(newInstance);

            // make a temporary new leaf for this instance and get CU
            CNode newLeaf = new CNode(m_numAttributes, newInstance);
            m_children.add(newLeaf);
            double bestHostCU = categoryUtility();
            CNode finalBestHost = newLeaf;

            // remove new leaf when searching for best and second best nodes to
            // consider for merging and splitting
            m_children.remove(m_children.size() - 1);

            // now determine the best host (and the second best)
            int best = 0;
            int secondBest = 0;
            for (int i = 0; i < categoryUtils.length; i++) {
                if (categoryUtils[i] > categoryUtils[secondBest]) {
                    if (categoryUtils[i] > categoryUtils[best]) {
                        secondBest = best;
                        best = i;
                    } else {
                        secondBest = i;
                    }
                }
            }

            CNode a = m_children.get(best);
            CNode b = m_children.get(secondBest);
            if (categoryUtils[best] > bestHostCU) {
                bestHostCU = categoryUtils[best];
                finalBestHost = a;
                // System.out.println("Node is best");
            }

            if (structureFrozen) {
                if (finalBestHost == newLeaf) {
                    return null; // *this* node is the best host
                } else {
                    return finalBestHost;
                }
            }

            double mergedCU = -Double.MAX_VALUE;
            CNode merged = new CNode(m_numAttributes);
            if (a != b) {
                mergedCU = cuScoreForBestTwoMerged(merged, a, b, newInstance);

                if (mergedCU > bestHostCU) {
                    bestHostCU = mergedCU;
                    finalBestHost = merged;
                }
            }

            // Consider splitting the best
            double splitCU = -Double.MAX_VALUE;
            double splitBestChildCU = -Double.MAX_VALUE;
            double splitPlusNewLeafCU = -Double.MAX_VALUE;
            double splitPlusMergeBestTwoCU = -Double.MAX_VALUE;
            if (a.m_children != null) {
                ArrayList<CNode> tempChildren = new ArrayList<CNode>();

                for (int i = 0; i < m_children.size(); i++) {
                    CNode existingChild = m_children.get(i);
                    if (existingChild != a) {
                        tempChildren.add(existingChild);
                    }
                }
                for (int i = 0; i < a.m_children.size(); i++) {
                    CNode promotedChild = a.m_children.get(i);
                    tempChildren.add(promotedChild);
                }
                // also add the new leaf
                tempChildren.add(newLeaf);

                ArrayList<CNode> saveStatusQuo = m_children;
                m_children = tempChildren;
                splitPlusNewLeafCU = categoryUtility(); // split + new leaf
                // remove the new leaf
                tempChildren.remove(tempChildren.size() - 1);
                // now look for best and second best
                categoryUtils = cuScoresForChildren(newInstance);

                // now determine the best host (and the second best)
                best = 0;
                secondBest = 0;
                for (int i = 0; i < categoryUtils.length; i++) {
                    if (categoryUtils[i] > categoryUtils[secondBest]) {
                        if (categoryUtils[i] > categoryUtils[best]) {
                            secondBest = best;
                            best = i;
                        } else {
                            secondBest = i;
                        }
                    }
                }
                CNode sa = m_children.get(best);
                CNode sb = m_children.get(secondBest);
                splitBestChildCU = categoryUtils[best];

                // now merge best and second best
                CNode mergedSplitChildren = new CNode(m_numAttributes);
                if (sa != sb) {
                    splitPlusMergeBestTwoCU = cuScoreForBestTwoMerged(mergedSplitChildren, sa, sb, newInstance);
                }
                splitCU = (splitBestChildCU > splitPlusNewLeafCU) ? splitBestChildCU : splitPlusNewLeafCU;
                splitCU = (splitCU > splitPlusMergeBestTwoCU) ? splitCU : splitPlusMergeBestTwoCU;

                if (splitCU > bestHostCU) {
                    bestHostCU = splitCU;
                    finalBestHost = this;
                    // tempChildren.remove(tempChildren.size()-1);
                } else {
                    // restore the status quo
                    m_children = saveStatusQuo;
                }
            }

            if (finalBestHost != this) {
                // can commit the instance to the set of instances at this node
                m_clusterInstances.add(newInstance);
            } else {
                m_numberSplits++;
            }

            if (finalBestHost == merged) {
                m_numberMerges++;
                m_children.remove(m_children.indexOf(a));
                m_children.remove(m_children.indexOf(b));
                m_children.add(merged);
            }

            if (finalBestHost == newLeaf) {
                finalBestHost = new CNode(m_numAttributes);
                m_children.add(finalBestHost);
            }

            if (bestHostCU < m_cutoff) {
                if (finalBestHost == this) {
                    // splitting was the best, but since we are cutting all children
                    // recursion is aborted and we still need to add the instance
                    // to the set of instances at this node
                    m_clusterInstances.add(newInstance);
                }
                m_children = null;
                finalBestHost = null;
            }

            if (finalBestHost == this) {
                // splitting is still the best, so downdate the stats as
                // we'll be recursively calling on this node
                updateStats(newInstance, true);
            }

            return finalBestHost;
        }

        /**
         * Adds the supplied node as a child of this node. All of the child's instances
         * are added to this nodes instances
         * 
         * @param child the child to add
         */
        protected void addChildNode(CNode child) {
            for (int i = 0; i < child.m_clusterInstances.numInstances(); i++) {
                Instance temp = child.m_clusterInstances.instance(i);
                m_clusterInstances.add(temp);
                updateStats(temp, false);
            }

            if (m_children == null) {
                m_children = new ArrayList<CNode>();
            }
            m_children.add(child);
        }

        /**
         * Computes the utility of all children with respect to this node
         * 
         * @return the category utility of the children with respect to this node.
         * @throws Exception if there are no children
         */
        protected double categoryUtility() throws Exception {

            if (m_children == null) {
                throw new Exception("categoryUtility: No children!");
            }

            double totalCU = 0;

            for (int i = 0; i < m_children.size(); i++) {
                CNode child = m_children.get(i);
                totalCU += categoryUtilityChild(child);
            }

            totalCU /= m_children.size();
            return totalCU;
        }

        /**
         * Computes the utility of a single child with respect to this node
         * 
         * @param child the child for which to compute the utility
         * @return the utility of the child with respect to this node
         * @throws Exception if something goes wrong
         */
        protected double categoryUtilityChild(CNode child) throws Exception {

            double sum = 0;
            for (int i = 0; i < m_numAttributes; i++) {
                if (m_clusterInstances.attribute(i).isNominal()) {
                    for (int j = 0; j < m_clusterInstances.attribute(i).numValues(); j++) {
                        double x = child.getProbability(i, j);
                        double y = getProbability(i, j);
                        sum += (x - y) * (x + y);
                    }
                } else {
                    // numeric attribute
                    sum += ((m_normal / child.getStandardDev(i)) - (m_normal / getStandardDev(i)));

                }
            }
            return (child.m_totalInstances / m_totalInstances) * sum;
        }

        /**
         * Returns the probability of a value of a nominal attribute in this node
         * 
         * @param attIndex   the index of the attribute
         * @param valueIndex the index of the value of the attribute
         * @return the probability
         * @throws Exception if the requested attribute is not nominal
         */
        protected double getProbability(int attIndex, int valueIndex) throws Exception {

            if (!m_clusterInstances.attribute(attIndex).isNominal()) {
                throw new Exception("getProbability: attribute is not nominal");
            }

            if (m_attStats[attIndex].totalCount <= 0) {
                return 0;
            }

            return (double) m_attStats[attIndex].nominalCounts[valueIndex] / (double) m_attStats[attIndex].totalCount;
        }

        /**
         * Returns the standard deviation of a numeric attribute
         * 
         * @param attIndex the index of the attribute
         * @return the standard deviation
         * @throws Exception if an error occurs
         */
        protected double getStandardDev(int attIndex) throws Exception {
            if (!m_clusterInstances.attribute(attIndex).isNumeric()) {
                throw new Exception("getStandardDev: attribute is not numeric");
            }

            m_attStats[attIndex].numericStats.calculateDerived();
            double stdDev = m_attStats[attIndex].numericStats.stdDev;
            if (Double.isNaN(stdDev) || Double.isInfinite(stdDev)) {
                return m_acuity;
            }

            return Math.max(m_acuity, stdDev);
        }

        /**
         * Update attribute stats using the supplied instance.
         * 
         * @param updateInstance the instance for updating
         * @param delete         true if the values of the supplied instance are to be
         *                       removed from the statistics
         */
        protected void updateStats(Instance updateInstance, boolean delete) {

            if (m_attStats == null) {
                m_attStats = new AttributeStats[m_numAttributes];
                for (int i = 0; i < m_numAttributes; i++) {
                    m_attStats[i] = new AttributeStats();
                    if (m_clusterInstances.attribute(i).isNominal()) {
                        m_attStats[i].nominalCounts = new int[m_clusterInstances.attribute(i).numValues()];
                    } else {
                        m_attStats[i].numericStats = new Stats();
                    }
                }
            }
            for (int i = 0; i < m_numAttributes; i++) {
                if (!updateInstance.isMissing(i)) {
                    double value = updateInstance.value(i);
                    if (m_clusterInstances.attribute(i).isNominal()) {
                        m_attStats[i].nominalCounts[(int) value] += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight();
                        m_attStats[i].totalCount += (delete) ? (-1.0 * updateInstance.weight()) : updateInstance.weight();
                    } else {
                        if (delete) {
                            m_attStats[i].numericStats.subtract(value, updateInstance.weight());
                        } else {
                            m_attStats[i].numericStats.add(value, updateInstance.weight());
                        }
                    }
                }
            }
            m_totalInstances += (delete) ? (-1.0 * updateInstance.weight()) : (updateInstance.weight());
        }

        /**
         * Recursively assigns numbers to the nodes in the tree.
         * 
         * @param cl_num an <code>int[]</code> value
         * @throws Exception if an error occurs
         */
        private void assignClusterNums(int[] cl_num) throws Exception {
            if (m_children != null && m_children.size() < 2) {
                throw new Exception("assignClusterNums: tree not built correctly!");
            }

            m_clusterNum = cl_num[0];
            cl_num[0]++;
            if (m_children != null) {
                for (int i = 0; i < m_children.size(); i++) {
                    CNode child = m_children.get(i);
                    child.assignClusterNums(cl_num);
                }
            }
        }

        /**
         * Recursively build a string representation of the Cobweb tree
         * 
         * @param depth depth of this node in the tree
         * @param text  holds the string representation
         */
        protected void dumpTree(int depth, StringBuffer text) {

            if (depth == 0) {
                determineNumberOfClusters();
            }

            if (m_children == null) {
                text.append("\n");
                for (int j = 0; j < depth; j++) {
                    text.append("|   ");
                }
                text.append("leaf " + m_clusterNum + " [" + m_clusterInstances.numInstances() + "]");
            } else {
                for (int i = 0; i < m_children.size(); i++) {
                    text.append("\n");
                    for (int j = 0; j < depth; j++) {
                        text.append("|   ");
                    }
                    text.append("node " + m_clusterNum + " [" + m_clusterInstances.numInstances() + "]");
                    m_children.get(i).dumpTree(depth + 1, text);
                }
            }
        }

        /**
         * Returns the instances at this node as a string. Appends the cluster number of
         * the child that each instance belongs to.
         * 
         * @return a <code>String</code> value
         * @throws Exception if an error occurs
         */
        protected String dumpData() throws Exception {
            if (m_children == null) {
                return m_clusterInstances.toString();
            }

            // construct instances string with cluster numbers attached
            CNode tempNode = new CNode(m_numAttributes);
            tempNode.m_clusterInstances = new Instances(m_clusterInstances, 1);
            for (int i = 0; i < m_children.size(); i++) {
                tempNode.addChildNode(m_children.get(i));
            }
            Instances tempInst = tempNode.m_clusterInstances;
            tempNode = null;

            Add af = new Add();
            af.setAttributeName("Cluster");
            String labels = "";
            for (int i = 0; i < m_children.size(); i++) {
                CNode temp = m_children.get(i);
                labels += ("C" + temp.m_clusterNum);
                if (i < m_children.size() - 1) {
                    labels += ",";
                }
            }
            af.setNominalLabels(labels);
            af.setInputFormat(tempInst);
            tempInst = Filter.useFilter(tempInst, af);
            tempInst.setRelationName("Cluster " + m_clusterNum);

            int z = 0;
            for (int i = 0; i < m_children.size(); i++) {
                CNode temp = m_children.get(i);
                for (int j = 0; j < temp.m_clusterInstances.numInstances(); j++) {
                    tempInst.instance(z).setValue(m_numAttributes, i);
                    z++;
                }
            }
            return tempInst.toString();
        }

        /**
         * Recursively generate the graph string for the Cobweb tree.
         * 
         * @param text holds the graph string
         * @throws Exception if generation fails
         */
        protected void graphTree(StringBuffer text) throws Exception {

            text.append("N" + m_clusterNum + " [label=\"" + ((m_children == null) ? "leaf " : "node ") + m_clusterNum + " " + " (" + m_clusterInstances.numInstances() + ")\" " + ((m_children == null) ? "shape=box style=filled " : "") + (m_saveInstances ? "data =\n" + dumpData() + "\n,\n" : "") + "]\n");
            if (m_children != null) {
                for (int i = 0; i < m_children.size(); i++) {
                    CNode temp = m_children.get(i);
                    text.append("N" + m_clusterNum + "->" + "N" + temp.m_clusterNum + "\n");
                }

                for (int i = 0; i < m_children.size(); i++) {
                    CNode temp = m_children.get(i);
                    temp.graphTree(text);
                }
            }
        }

    }

    /**
     * Normal constant.
     */
    protected static final double m_normal = 1.0 / (2 * Math.sqrt(Math.PI));

    /**
     * Acuity (minimum standard deviation).
     */
    protected double m_acuity = 1.0;

    /**
     * Cutoff (minimum category utility).
     */
    protected double m_cutoff = 0.01 * Cobweb.m_normal;

    /**
     * Holds the root of the Cobweb tree.
     */
    protected CNode m_cobwebTree = null;

    /**
     * Number of clusters (nodes in the tree). Must never be queried directly, only
     * via the method numberOfClusters(). Otherwise it's not guaranteed that it
     * contains the correct value.
     * 
     * @see #numberOfClusters()
     * @see #m_numberOfClustersDetermined
     */
    protected int m_numberOfClusters = -1;

    /** whether the number of clusters was already determined */
    protected boolean m_numberOfClustersDetermined = false;

    /** the number of splits that happened */
    protected int m_numberSplits;

    /** the number of merges that happened */
    protected int m_numberMerges;

    /**
     * Output instances in graph representation of Cobweb tree (Allows instances at
     * nodes in the tree to be visualized in the Explorer).
     */
    protected boolean m_saveInstances = false;

    /**
     * default constructor
     */
    public Cobweb() {
        super();

        m_SeedDefault = 42;
        setSeed(m_SeedDefault);
    }

    /**
     * Returns a string describing this clusterer
     * 
     * @return a description of the evaluator suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String globalInfo() {
        return "Class implementing the Cobweb and Classit clustering algorithms.\n\n" + "Note: the application of node operators (merging, splitting etc.) in " + "terms of ordering and priority differs (and is somewhat ambiguous) " + "between the original Cobweb and Classit papers. This algorithm always " + "compares the best host, adding a new leaf, merging the two best hosts, " + "and splitting the best host when considering where to place a new " + "instance.\n\n" + "For more information see:\n\n" + getTechnicalInformation().toString();
    }

    /**
     * Returns an instance of a TechnicalInformation object, containing detailed
     * information about the technical background of this class, e.g., paper
     * reference or book this class is based on.
     * 
     * @return the technical information about this class
     */
    @Override
    public TechnicalInformation getTechnicalInformation() {
        TechnicalInformation result;
        TechnicalInformation additional;

        result = new TechnicalInformation(Type.ARTICLE);
        result.setValue(Field.AUTHOR, "D. Fisher");
        result.setValue(Field.YEAR, "1987");
        result.setValue(Field.TITLE, "Knowledge acquisition via incremental conceptual clustering");
        result.setValue(Field.JOURNAL, "Machine Learning");
        result.setValue(Field.VOLUME, "2");
        result.setValue(Field.NUMBER, "2");
        result.setValue(Field.PAGES, "139-172");

        additional = result.add(Type.ARTICLE);
        additional.setValue(Field.AUTHOR, "J. H. Gennari and P. Langley and D. Fisher");
        additional.setValue(Field.YEAR, "1990");
        additional.setValue(Field.TITLE, "Models of incremental concept formation");
        additional.setValue(Field.JOURNAL, "Artificial Intelligence");
        additional.setValue(Field.VOLUME, "40");
        additional.setValue(Field.PAGES, "11-61");

        return result;
    }

    /**
     * Returns default capabilities of the clusterer.
     * 
     * @return the capabilities of this clusterer
     */
    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enable(Capability.NO_CLASS);

        // attributes
        result.enable(Capability.NOMINAL_ATTRIBUTES);
        result.enable(Capability.NUMERIC_ATTRIBUTES);
        result.enable(Capability.DATE_ATTRIBUTES);
        result.enable(Capability.MISSING_VALUES);

        // other
        result.setMinimumNumberInstances(0);

        return result;
    }

    /**
     * Builds the clusterer.
     * 
     * @param data the training instances.
     * @throws Exception if something goes wrong.
     */
    @Override
    public void buildClusterer(Instances data) throws Exception {
        m_numberOfClusters = -1;
        m_cobwebTree = null;
        m_numberSplits = 0;
        m_numberMerges = 0;

        // can clusterer handle the data?
        getCapabilities().testWithFail(data);

        // randomize the instances
        data = new Instances(data);

        if (getSeed() >= 0) {
            data.randomize(new Random(getSeed()));
        }

        for (int i = 0; i < data.numInstances(); i++) {
            updateClusterer(data.instance(i));
        }

        updateFinished();
    }

    /**
     * Singals the end of the updating.
     */
    @Override
    public void updateFinished() {
        determineNumberOfClusters();
    }

    /**
     * Classifies a given instance.
     * 
     * @param instance the instance to be assigned to a cluster
     * @return the number of the assigned cluster as an interger if the class is
     *         enumerated, otherwise the predicted value
     * @throws Exception if instance could not be classified successfully
     */
    @Override
    public int clusterInstance(Instance instance) throws Exception {
        CNode host = m_cobwebTree;
        CNode temp = null;

        determineNumberOfClusters();

        do {
            if (host.m_children == null) {
                temp = null;
                break;
            }

            // host.updateStats(instance, false);
            temp = host.findHost(instance, true);
            // host.updateStats(instance, true);

            if (temp != null) {
                host = temp;
            }
        } while (temp != null);

        return host.m_clusterNum;
    }

    /**
     * determines the number of clusters if necessary
     * 
     * @see #m_numberOfClusters
     * @see #m_numberOfClustersDetermined
     */
    protected void determineNumberOfClusters() {
        if (!m_numberOfClustersDetermined && (m_cobwebTree != null)) {
            int[] numClusts = new int[1];
            numClusts[0] = 0;
            try {
                m_cobwebTree.assignClusterNums(numClusts);
            } catch (Exception e) {
                e.printStackTrace();
                numClusts[0] = 0;
            }
            m_numberOfClusters = numClusts[0];

            m_numberOfClustersDetermined = true;
        }
    }

    /**
     * Returns the number of clusters.
     * 
     * @return the number of clusters
     */
    @Override
    public int numberOfClusters() {
        determineNumberOfClusters();
        return m_numberOfClusters;
    }

    /**
     * Get the root of the tree.
     * 
     * @return the root of the tree.
     */
    public CNode getTreeRoot() {
        return m_cobwebTree;
    }

    /**
     * Adds an instance to the clusterer.
     * 
     * @param newInstance the instance to be added
     * @throws Exception if something goes wrong
     */
    @Override
    public void updateClusterer(Instance newInstance) throws Exception {
        m_numberOfClustersDetermined = false;

        if (m_cobwebTree == null) {
            m_cobwebTree = new CNode(newInstance.numAttributes(), newInstance);
        } else {
            m_cobwebTree.addInstance(newInstance);
        }
    }

    /**
     * Adds an instance to the Cobweb tree.
     * 
     * @param newInstance the instance to be added
     * @throws Exception if something goes wrong
     * @deprecated updateClusterer(Instance) should be used instead
     * @see #updateClusterer(Instance)
     */
    @Deprecated
    public void addInstance(Instance newInstance) throws Exception {
        updateClusterer(newInstance);
    }

    /**
     * Returns an enumeration describing the available options.
     * 
     * @return an enumeration of all the available options.
     **/
    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();

        result.addElement(new Option("\tAcuity.\n" + "\t(default=1.0)", "A", 1, "-A <acuity>"));

        result.addElement(new Option("\tCutoff.\n" + "\t(default=0.002)", "C", 1, "-C <cutoff>"));

        result.addElement(new Option("\tSave instance data.", "save-data", 0, "-save-data"));

        result.addAll(Collections.list(super.listOptions()));

        return result.elements();
    }

    /**
     * Parses a given list of options.
     * <p/>
     * 
     * <!-- options-start --> Valid options are:
     * <p/>
     * 
     * <pre>
     *  -A &lt;acuity&gt;
     *  Acuity.
     *  (default=1.0)
     * </pre>
     * 
     * <pre>
     *  -C &lt;cutoff&gt;
     *  Cutoff.
     *  (default=0.002)
     * </pre>
     * 
     * <pre>
     *  -save-data
     *  Save instance data.
     * </pre>
     * 
     * <pre>
     *  -S &lt;num&gt;
     *  Random number seed.
     *  (default 42)
     * </pre>
     * 
     * <pre>
     *  -output-debug-info
     *  If set, clusterer is run in debug mode and
     *  may output additional info to the console
     * </pre>
     * 
     * <pre>
     *  -do-not-check-capabilities
     *  If set, clusterer capabilities are not checked before clusterer is built
     *  (use with caution).
     * </pre>
     * 
     * <!-- options-end -->
     * 
     * @param options the list of options as an array of strings
     * @throws Exception if an option is not supported
     */
    @Override
    public void setOptions(String[] options) throws Exception {
        String optionString;

        optionString = Utils.getOption('A', options);
        if (optionString.length() != 0) {
            Double temp = new Double(optionString);
            setAcuity(temp.doubleValue());
        } else {
            m_acuity = 1.0;
        }
        optionString = Utils.getOption('C', options);
        if (optionString.length() != 0) {
            Double temp = new Double(optionString);
            setCutoff(temp.doubleValue());
        } else {
            m_cutoff = 0.01 * Cobweb.m_normal;
        }

        setSaveInstanceData(Utils.getFlag("save-data", options));

        super.setOptions(options);

        Utils.checkForRemainingOptions(options);
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String acuityTipText() {
        return "set the minimum standard deviation for numeric attributes";
    }

    /**
     * set the acuity.
     * 
     * @param a the acuity value
     */
    public void setAcuity(double a) {
        m_acuity = a;
    }

    /**
     * get the acuity value
     * 
     * @return the acuity
     */
    public double getAcuity() {
        return m_acuity;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String cutoffTipText() {
        return "set the category utility threshold by which to prune nodes";
    }

    /**
     * set the cutoff
     * 
     * @param c the cutof
     */
    public void setCutoff(double c) {
        m_cutoff = c;
    }

    /**
     * get the cutoff
     * 
     * @return the cutoff
     */
    public double getCutoff() {
        return m_cutoff;
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    public String saveInstanceDataTipText() {
        return "save instance information for visualization purposes";
    }

    /**
     * Get the value of saveInstances.
     * 
     * @return Value of saveInstances.
     */
    public boolean getSaveInstanceData() {

        return m_saveInstances;
    }

    /**
     * Set the value of saveInstances.
     * 
     * @param newsaveInstances Value to assign to saveInstances.
     */
    public void setSaveInstanceData(boolean newsaveInstances) {

        m_saveInstances = newsaveInstances;
    }

    /**
     * Gets the current settings of Cobweb.
     * 
     * @return an array of strings suitable for passing to setOptions()
     */
    @Override
    public String[] getOptions() {

        Vector<String> result = new Vector<String>();

        result.add("-A");
        result.add("" + m_acuity);
        result.add("-C");
        result.add("" + m_cutoff);

        if (getSaveInstanceData()) {
            result.add("-save-data");
        }

        Collections.addAll(result, super.getOptions());

        return result.toArray(new String[result.size()]);
    }

    /**
     * Returns a description of the clusterer as a string.
     * 
     * @return a string describing the clusterer.
     */
    @Override
    public String toString() {
        StringBuffer text = new StringBuffer();
        if (m_cobwebTree == null) {
            return "Cobweb hasn't been built yet!";
        } else {
            m_cobwebTree.dumpTree(0, text);
            return "Number of merges: " + m_numberMerges + "\nNumber of splits: " + m_numberSplits + "\nNumber of clusters: " + numberOfClusters() + "\n" + text.toString() + "\n\n";

        }
    }

    /**
     * Returns the type of graphs this class represents
     * 
     * @return Drawable.TREE
     */
    @Override
    public int graphType() {
        return Drawable.TREE;
    }

    /**
     * Generates the graph string of the Cobweb tree
     * 
     * @return a <code>String</code> value
     * @throws Exception if an error occurs
     */
    @Override
    public String graph() throws Exception {
        StringBuffer text = new StringBuffer();

        text.append("digraph CobwebTree {\n");
        m_cobwebTree.graphTree(text);
        text.append("}\n");
        return text.toString();
    }

    /**
     * Returns the tip text for this property
     * 
     * @return tip text for this property suitable for displaying in the
     *         explorer/experimenter gui
     */
    @Override
    public String seedTipText() {
        String result = super.seedTipText() + " Use -1 for no randomization.";

        return result;
    }

    /**
     * Main method.
     * 
     * @param argv the commandline options
     */
    public static void main(String[] argv) {
        runClusterer(new Cobweb(), argv);
    }
}
